{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings\n",
    "from llama_index.core.embeddings import resolve_embed_model\n",
    "from llama_index.llms.mistral_rs import MistralRS\n",
    "from mistralrs import Which\n",
    "import sys\n",
    "\n",
    "documents = SimpleDirectoryReader(\"data\").load_data()\n",
    "\n",
    "# bge embedding model\n",
    "Settings.embed_model = resolve_embed_model(\"local:BAAI/bge-small-en-v1.5\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Settings.llm = MistralRS(\n",
    "    which=Which.GGUF(\n",
    "        tok_model_id=\"mistralai/Mistral-7B-Instruct-v0.1\",\n",
    "        quantized_model_id=\"TheBloke/Mistral-7B-Instruct-v0.1-GGUF\",\n",
    "        quantized_filename=\"mistral-7b-instruct-v0.1.Q4_K_M.gguf\",\n",
    "        tokenizer_json=None,\n",
    "        repeat_last_n=64,\n",
    "    ),\n",
    "    max_new_tokens=4096,\n",
    "    context_window=1024 * 5,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "index = VectorStoreIndex.from_documents(\n",
    "    documents,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query_engine = index.as_query_engine(streaming=True)\n",
    "response = query_engine.query(\"What are non-granular scalings?\")\n",
    "for text in response.response_gen:\n",
    "    print(text, end=\"\")\n",
    "    sys.stdout.flush()"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
